Set RNotebook for Root Directory

Set Working Directory

setwd("~/Desktop/af-werx")

Getting working Directory

getwd()
## [1] "/Users/datasociety/Desktop/af-werx/data"

creating and setting the working directory

main_dir = "~/Desktop/af-werx"
data_dir = paste0(main_dir, "/data")
data_dir
## [1] "~/Desktop/af-werx/data"
plot_dir = paste0(main_dir, "/plots")
plot_dir
## [1] "~/Desktop/af-werx/plots"
setwd(data_dir)
getwd()
## [1] "/Users/datasociety/Desktop/af-werx/data"
#load("tidyr_tables.RData")
flights = nycflights13::flights

Static Plot Example

plot(c(1,4,6,7,8,11,44,66))

# read data from data sample

CMP = read.csv("ChemicalManufacturingProcess.csv", header =  T, stringsAsFactors = F)

take a look at the data

#View(CMP)
column_ids = c(1:4,14:16)
column_ids
## [1]  1  2  3  4 14 15 16

display column ids and first few variables

CMP_subset = CMP[,column_ids]
str(CMP_subset)
## 'data.frame':    176 obs. of  7 variables:
##  $ Yield                 : num  38 42.4 42 41.4 42.5 ...
##  $ BiologicalMaterial01  : num  6.25 8.01 8.01 8.01 7.47 6.12 7.48 6.94 6.94 6.94 ...
##  $ BiologicalMaterial02  : num  49.6 61 61 61 63.3 ...
##  $ BiologicalMaterial03  : num  57 67.5 67.5 67.5 72.2 ...
##  $ ManufacturingProcess01: num  NA 0 0 0 10.7 12 11.5 12 12 12 ...
##  $ ManufacturingProcess02: num  NA 0 0 0 0 0 0 0 0 0 ...
##  $ ManufacturingProcess03: num  NA NA NA NA NA NA 1.56 1.55 1.56 1.55 ...
summary(CMP_subset$Yield)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   35.25   38.75   39.97   40.18   41.48   46.34

display a simple boxplot

boxplot(CMP_subset$Yield)

# make it orange and give it a name

boxplot(CMP_subset$Yield, col= "orange", main ="Yield Summary")

# display color names range 1:6

colors()[1:6]
## [1] "white"         "aliceblue"     "antiquewhite"  "antiquewhite1"
## [5] "antiquewhite2" "antiquewhite3"

# display color help and examples

?colors
demo(colors)
## 
## 
##  demo(colors)
##  ---- ~~~~~~
## 
## > ### ----------- Show (almost) all named colors ---------------------
## > 
## > ## 1) with traditional 'graphics' package:
## > showCols1 <- function(bg = "gray", cex = 0.75, srt = 30) {
## +     m <- ceiling(sqrt(n <- length(cl <- colors())))
## +     length(cl) <- m*m; cm <- matrix(cl, m)
## +     ##
## +     require("graphics")
## +     op <- par(mar=rep(0,4), ann=FALSE, bg = bg); on.exit(par(op))
## +     plot(1:m,1:m, type="n", axes=FALSE)
## +     text(col(cm), rev(row(cm)), cm,  col = cl, cex=cex, srt=srt)
## + }
## 
## > showCols1()
## 
## > ## 2) with 'grid' package:
## > showCols2 <- function(bg = "grey", cex = 0.75, rot = 30) {
## +     m <- ceiling(sqrt(n <- length(cl <- colors())))
## +     length(cl) <- m*m; cm <- matrix(cl, m)
## +     ##
## +     require("grid")
## +     grid.newpage(); vp <- viewport(w = .92, h = .92)
## +     grid.rect(gp=gpar(fill=bg))
## +     grid.text(cm, x = col(cm)/m, y = rev(row(cm))/m, rot = rot,
## +               vp=vp, gp=gpar(cex = cex, col = cm))
## + }
## 
## > showCols2()
## Loading required package: grid

## 
## > showCols2(bg = "gray33")

## 
## > ###
## > 
## > ##' @title Comparing Colors
## > ##' @param col
## > ##' @param nrow
## > ##' @param ncol
## > ##' @param txt.col
## > ##' @return the grid layout, invisibly
## > ##' @author Marius Hofert, originally
## > plotCol <- function(col, nrow=1, ncol=ceiling(length(col) / nrow),
## +                     txt.col="black") {
## +     stopifnot(nrow >= 1, ncol >= 1)
## +     if(length(col) > nrow*ncol)
## +         warning("some colors will not be shown")
## +     require(grid)
## +     grid.newpage()
## +     gl <- grid.layout(nrow, ncol)
## +     pushViewport(viewport(layout=gl))
## +     ic <- 1
## +     for(i in 1:nrow) {
## +         for(j in 1:ncol) {
## +             pushViewport(viewport(layout.pos.row=i, layout.pos.col=j))
## +             grid.rect(gp= gpar(fill=col[ic]))
## +             grid.text(col[ic], gp=gpar(col=txt.col))
## +             upViewport()
## +             ic <- ic+1
## +         }
## +     }
## +     upViewport()
## +     invisible(gl)
## + }
## 
## > ## A Chocolate Bar of colors:
## > plotCol(c("#CC8C3C", paste0("chocolate", 2:4),
## +           paste0("darkorange", c("",1:2)), paste0("darkgoldenrod", 1:2),
## +           "orange", "orange1", "sandybrown", "tan1", "tan2"),
## +         nrow=2)

## 
## > ##' Find close R colors() to a given color {original by Marius Hofert)
## > ##' using Euclidean norm in (HSV / RGB / ...) color space
## > nearRcolor <- function(rgb, cSpace = c("hsv", "rgb255", "Luv", "Lab"),
## +                        dist = switch(cSpace, "hsv" = 0.10, "rgb255" = 30,
## +                        "Luv" = 15, "Lab" = 12))
## + {
## +     if(is.character(rgb)) rgb <- col2rgb(rgb)
## +     stopifnot(length(rgb <- as.vector(rgb)) == 3)
## +     Rcol <- col2rgb(.cc <- colors())
## +     uniqC <- !duplicated(t(Rcol)) # gray9 == grey9 (etc)
## +     Rcol <- Rcol[, uniqC] ; .cc <- .cc[uniqC]
## +     cSpace <- match.arg(cSpace)
## +     convRGB2 <- function(Rgb, to)
## +         t(convertColor(t(Rgb), from="sRGB", to=to, scale.in=255))
## +     ## the transformation,  rgb{0..255} --> cSpace :
## +     TransF <- switch(cSpace,
## +                      "rgb255" = identity,
## +                      "hsv" = rgb2hsv,
## +                      "Luv" = function(RGB) convRGB2(RGB, "Luv"),
## +                      "Lab" = function(RGB) convRGB2(RGB, "Lab"))
## +     d <- sqrt(colSums((TransF(Rcol) - as.vector(TransF(rgb)))^2))
## +     iS <- sort.list(d[near <- d <= dist])# sorted: closest first
## +     setNames(.cc[near][iS], format(zapsmall(d[near][iS]), digits=3))
## + }
## 
## > nearRcolor(col2rgb("tan2"), "rgb")
##          0.0         21.1         25.8         29.5 
##       "tan2"       "tan1" "sandybrown"    "sienna1" 
## 
## > nearRcolor(col2rgb("tan2"), "hsv")
##       0.0000       0.0410       0.0618       0.0638       0.0667 
##       "tan2"    "sienna2"     "coral2"    "tomato2"       "tan1" 
##       0.0766       0.0778       0.0900       0.0912       0.0918 
##      "coral"    "sienna1" "sandybrown"     "coral1"     "tomato" 
## 
## > nearRcolor(col2rgb("tan2"), "Luv")
##         0.00         7.42         7.48        12.41        13.69 
##       "tan2"       "tan1" "sandybrown"    "orange3"    "orange2" 
## 
## > nearRcolor(col2rgb("tan2"), "Lab")
##         0.00         5.56         8.08        11.31 
##       "tan2"       "tan1" "sandybrown"       "peru" 
## 
## > nearRcolor("#334455")
##          0.0867 
## "darkslategray" 
## 
## > ## Now, consider choosing a color by looking in the
## > ## neighborhood of one you know :
## > 
## > plotCol(nearRcolor("deepskyblue", "rgb", dist=50))

## 
## > plotCol(nearRcolor("deepskyblue", dist=.1))

## 
## > plotCol(nearRcolor("tomato", "rgb", dist= 50), nrow=3)

## 
## > plotCol(nearRcolor("tomato", "hsv", dist=.12), nrow=3)

## 
## > plotCol(nearRcolor("tomato", "Luv", dist= 25), nrow=3)

## 
## > plotCol(nearRcolor("tomato", "Lab", dist= 18), nrow=3)

# get random set of color set

set.seed(2)
n_cols = ncol(CMP_subset)
col_sample = sample(colors(), n_cols)
col_sample
## [1] "lightgray"      "lavenderblush4" "grey12"         "grey88"        
## [5] "gray51"         "ivory4"         "grey36"

use colors in boxplot

boxplot(CMP_subset, col = col_sample)

# display histogram data without plotting

hist(CMP_subset$Yield, plot = F)
## $breaks
##  [1] 35 36 37 38 39 40 41 42 43 44 45 46 47
## 
## $counts
##  [1]  1  3 16 31 39 32 21 20 10  2  0  1
## 
## $density
##  [1] 0.005681818 0.017045455 0.090909091 0.176136364 0.221590909
##  [6] 0.181818182 0.119318182 0.113636364 0.056818182 0.011363636
## [11] 0.000000000 0.005681818
## 
## $mids
##  [1] 35.5 36.5 37.5 38.5 39.5 40.5 41.5 42.5 43.5 44.5 45.5 46.5
## 
## $xname
## [1] "CMP_subset$Yield"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"

plotting histogram with vector of sample colors with x-axis and title changed

hist(CMP_subset$Yield, col=col_sample[1:3], xlab = "Yield", main = "Dist. of Yield" )

# using par(mfrow = c(x,x)) to designate the the layout of the below histograms

par(mfrow = c(1,2))
hist(CMP_subset$BiologicalMaterial01, col=col_sample[2], xlab = "Bio Material 1", main = "Dist. of Bio Material 1")
hist(CMP_subset$BiologicalMaterial02, col=col_sample[3], xlab = "Bio Material 2", main = "Dist. of Bio Material 2")

# display three histograms in dataframe view of 1 row, 3 columns

par(mfrow = c(1,3))
hist(CMP_subset$BiologicalMaterial01, col=col_sample[2], xlab = "Bio Material 1", main = "Dist. of Bio Material 1")
hist(CMP_subset$BiologicalMaterial02, col=col_sample[3], xlab = "Bio Material 2", main = "Dist. of Bio Material 2")
hist(CMP_subset$BiologicalMaterial03, col=col_sample[4], xlab = "Bio Material 3", main = "Dist. of Bio Material 3")

# creat plot with pch title, x/y-axis lables, and pch symbol mod with 2x enlargement, color =steelblue

plot(CMP_subset[,2],CMP_subset[,1], xlab = "Bio Material 1", ylab = "Yield", main = "Bio. Material 1 vs Yield",pch = 4, cex = 2, col ="steelblue")

# correlation matrix for quick analysis

pairs(CMP_subset[,1:4], pch=19, col="steelblue")

# install/load corrlot package

#install.packages("corrplot")
library(corrplot)
## corrplot 0.84 loaded
library(help="corrplot")

chart a corrplot on std deviation of 1

CMP_cor = cor(CMP_subset[,1:4])
#View(CMP_cor)

render it with a change in vis method

corrplot(CMP_cor, method = "pie")

# display mixed corrplot

corrplot.mixed(CMP_cor)

#install.packages("ggplot2")
library(ggplot2)
?ggplot2
ggp1 = ggplot(CMP_subset, aes(x=Yield))
ggp1

# Layered geom_histogram with binwidth 0.75, outline/fill color

ggp1 = ggp1 + geom_histogram(aes(y= ..density..), binwidth = 0.75, color="steelblue", fill="gray")

layer density on top of histogram

ggp1 = ggp1 + geom_density(alpha=.5, color="gray", fill="steelblue")
ggp1  

# using labs function to add title and subtitle

ggp1 = ggp1 + labs(title="Distrobution", subtitle="Histogram & Density")
ggp1

ggp2 = ggplot(CMP_subset, aes(x=BiologicalMaterial01, y=Yield))
ggp2

ggp2 = ggp2 + geom_point()
ggp2

ggp2 = ggp2 +
  geom_point(color="darkorange")+
  geom_smooth(method = lm)+
  labs(title="Bio. Material 1 vs Yield", subtitle = "Scatterplot with linear fit")
ggp2

ggtheme1 = theme_bw() + theme(axis.title = element_text(size = 20),
                            axis.text = element_text(size = 16), 
                            plot.title = element_text(size = 25), 
                            plot.subtitle = element_text(size = 18))
ggp2 = ggp2 + ggtheme1
ggp2